#!usr/bin/env python3
# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

'''preparing data'''
data = pd.read_csv('anscombe.csv')
data = data[data['dataset']=="II"]
df2 = data[['y','x']]
df2['x2'] = data['x']**2
X = df2[['x2','x']]
y = df2['y']
clf = LinearRegression()
clf.fit(X,y)
'''visualization'''
plt.figure(figsize=(8,5))
xx = np.linspace(4,14,100)
yy = clf.coef_[0]*xx**2 + clf.coef_[1]*xx + clf.intercept_
plt.plot(xx,yy,'r--')
plt.scatter(df2['x'],df2['y'],)
plt.xlabel('x')
plt.ylabel('y')
plt.title(f"MSE = {mean_squared_error(y,clf.predict(X)):e}")
plt.legend([f"y = {clf.coef_[0]:.4f}"+'${x^2}$'+\
            f"+{clf.coef_[1]:=.4f}x{clf.intercept_:=.4f}",'data'])
plt.show()
